######################################################
#WA Analysis 2014 Primary.
# When Women Run, Voters Will Follow (Sometimes): 
##Examining the Mobilizing Effect of Female Candidates in the 2014 and 2018 Midterm Elections 
#By Safarpour, Wyckoff Gaynor, Rouse, and Swers #
######################################################

#clean the environment.
rm(list=ls()) 

#Packages.
library(arm)
library(plyr)
library(mvtnorm) 
library(stargazer)
library(stats)
library(Hmisc)

#setwd
setwd("/Users/ACS/Dropbox/When Women Run/Revision_PoliticalBehavior/R&R Part 2/Publication Docs/Replication Data and Code/")

#read in WA data.
WA<-read.csv("WA2014PrimaryDataFinal.csv", 
             header = TRUE, stringsAsFactors = F)
names(WA)

#Main Model. 

m1<-glm(voted2014primary~ womanprimary+ 
          female+ womanprimary*female+
          Millennials2014+ Millennials2014*womanprimary+
          GenerationX2014+ GenerationX2014*womanprimary+ 
          SilentGeneration2014+ SilentGeneration2014*womanprimary+ 
          voted2012+obamavote2012, data=WA, 
        family=binomial(link="logit") )

m1
summary(m1)
preds.m1<- predict(m1, type="response")
summary(preds.m1) 


##Export model results to table for paper.
stargazer(m1,
          out="tableWA2014primary.html",
          type="html", covariate.labels = c("Female Candidate", "Female",  
                                            "Millennials",
                                            "Generation X",  
                                            "Silent Generation",
                                            "Voted in 2012 General Election", 
                                            "County Obama Vote 2012", 
                                            "Female Candidate*Female",
                                            "Female Candidate*Millennial",
                                            "Female Candidate*Generation X",
                                            "Female Candidate*Silent Generation"),
          dep.var.labels="Voted in WA 2014 Primary Election", column.labels = "",
          title = "Effects of candidate gender, generation, gender, prior voting, and county Obama vote share on 2014 Primary Turnout in WA",
          notes        = "Results from logistic regression, standard errors in parentheses. Baseline age category: Baby Boomers.", 
          notes.align = "l", digits=3, single.row = F)


########################################
## Simulate CIs around predicted effects
########################################
n_draws <- 1000
set.seed(1714)
vcov<- vcov(m1)
coef<-coef(m1)
sim_coefs_m1 <- rmvnorm(n_draws, coef, vcov) #Specify the 1,000 simulated coefficients.
rbind(coef(m1), apply(sim_coefs_m1, 2, mean)) # Check they are close to original


#Create regression sample.
sample <- WA[complete.cases(WA$voted2014primary, WA$womanprimary, WA$female,
                            WA$Millennials2014, WA$GenerationX2014, 
                            WA$SilentGeneration2014,
                            WA$voted2012, WA$obamavote2012)==T,] # Regression sample
rm(WA)

#Generate Predictions for male only candidates.
ppwomen0.s <- NULL
for (i in 1:n_draws) {
  ppwomen0.s[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                                   sim_coefs_m1[i,2]*0 + 
                                   sim_coefs_m1[i,3]*sample$female + 
                                   sim_coefs_m1[i,4]*sample$Millennials2014  +
                                   sim_coefs_m1[i,5]*sample$GenerationX2014 +
                                   sim_coefs_m1[i,6]*sample$SilentGeneration2014 +
                                   sim_coefs_m1[i,7]*sample$voted2012 +
                                   sim_coefs_m1[i,8]*sample$obamavote2012 +
                                   sim_coefs_m1[i,9]*sample$female*0+
                                   sim_coefs_m1[i,10]*0*sample$Millennials2014+
                                   sim_coefs_m1[i,11]*0*sample$GenerationX2014+
                                   sim_coefs_m1[i,12]*0*sample$SilentGeneration2014))
}
mean(ppwomen0.s) 

#Generate Predictions for female only candidates.
ppwomen1.s <- NULL
for (i in 1:n_draws) {
  ppwomen1.s[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                                   sim_coefs_m1[i,2]*1 + 
                                   sim_coefs_m1[i,3]*sample$female + 
                                   sim_coefs_m1[i,4]*sample$Millennials2014  +
                                   sim_coefs_m1[i,5]*sample$GenerationX2014 +
                                   sim_coefs_m1[i,6]*sample$SilentGeneration2014 +
                                   sim_coefs_m1[i,7]*sample$voted2012 +
                                   sim_coefs_m1[i,8]*sample$obamavote2012 +
                                   sim_coefs_m1[i,9]*sample$female*1+
                                   sim_coefs_m1[i,10]*1*sample$Millennials2014+
                                   sim_coefs_m1[i,11]*1*sample$GenerationX2014+
                                   sim_coefs_m1[i,12]*1*sample$SilentGeneration2014))
}
mean(ppwomen1.s) 


effect.01 <-  ppwomen1.s-ppwomen0.s
summary(effect.01) #-0.009594  (0-1 women candidate effect)

# CIs and put results together in matrix
elements <- list(ppwomen0.s, ppwomen1.s, effect.01) # I split these two lines so the code is clear.
lapply(elements, summary) # See summary for each element of the list
ci2 <- do.call("rbind", (lapply(elements, quantile, c(.025,.975)))) # Get quantiles for each

results <- cbind(ci2[,1], lapply(elements, mean), ci2[,2])
colnames(results) <- c("2.5", "Mean", "97.5")   
rownames(results) <- c("Only Male Candidates", "Female Candidate", "Effect Male only-Female Candidate")
results

# visualize results.
meaneffect<-results[[6]]
upperci<-results[[3]]
lowerci<-results[[9]]

##Get Predictions by Generation.
w0.SG	<- NULL
w1.SG 	<- NULL
w0.BB	<- NULL
w1.BB 	<- NULL
w0.GX	<- NULL
w1.GX 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL


SG <- subset(sample, SilentGeneration2014==1)
table(SG$SilentGeneration2014, exclude=NULL)
BB <- subset(sample, BabyBoomers2014==1)
GX <- subset(sample, GenerationX2014==1) 
M <- subset(sample, Millennials2014==1) 



for(i in 1:n_draws){ 
  w0.SG[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*0 + 
                              sim_coefs_m1[i,3]*SG$female + 
                              sim_coefs_m1[i,4]*SG$Millennials2014  +
                              sim_coefs_m1[i,5]*SG$GenerationX2014 +
                              sim_coefs_m1[i,6]*SG$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*SG$voted2012 +
                              sim_coefs_m1[i,8]*SG$obamavote2012 +
                              sim_coefs_m1[i,9]*SG$female*0+
                              sim_coefs_m1[i,10]*0*SG$Millennials2014+
                              sim_coefs_m1[i,11]*0*SG$GenerationX2014+
                              sim_coefs_m1[i,12]*0*SG$SilentGeneration2014))
  w1.SG[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*1 + 
                              sim_coefs_m1[i,3]*SG$female + 
                              sim_coefs_m1[i,4]*SG$Millennials2014  +
                              sim_coefs_m1[i,5]*SG$GenerationX2014 +
                              sim_coefs_m1[i,6]*SG$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*SG$voted2012 +
                              sim_coefs_m1[i,8]*SG$obamavote2012 +
                              sim_coefs_m1[i,9]*SG$female*1+
                              sim_coefs_m1[i,10]*1*SG$Millennials2014+
                              sim_coefs_m1[i,11]*1*SG$GenerationX2014+
                              sim_coefs_m1[i,12]*1*SG$SilentGeneration2014))
  w0.BB[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*0 + 
                              sim_coefs_m1[i,3]*BB$female + 
                              sim_coefs_m1[i,4]*BB$Millennials2014  +
                              sim_coefs_m1[i,5]*BB$GenerationX2014 +
                              sim_coefs_m1[i,6]*BB$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*BB$voted2012 +
                              sim_coefs_m1[i,8]*BB$obamavote2012 +
                              sim_coefs_m1[i,9]*BB$female*0+
                              sim_coefs_m1[i,10]*0*BB$Millennials2014+
                              sim_coefs_m1[i,11]*0*BB$GenerationX2014+
                              sim_coefs_m1[i,12]*0*BB$SilentGeneration2014))
  w1.BB[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*1 + 
                              sim_coefs_m1[i,3]*BB$female + 
                              sim_coefs_m1[i,4]*BB$Millennials2014  +
                              sim_coefs_m1[i,5]*BB$GenerationX2014 +
                              sim_coefs_m1[i,6]*BB$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*BB$voted2012 +
                              sim_coefs_m1[i,8]*BB$obamavote2012 +
                              sim_coefs_m1[i,9]*BB$female*1+
                              sim_coefs_m1[i,10]*1*BB$Millennials2014+
                              sim_coefs_m1[i,11]*1*BB$GenerationX2014+
                              sim_coefs_m1[i,12]*1*BB$SilentGeneration2014))
  w0.GX[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*0 + 
                              sim_coefs_m1[i,3]*GX$female + 
                              sim_coefs_m1[i,4]*GX$Millennials2014  +
                              sim_coefs_m1[i,5]*GX$GenerationX2014 +
                              sim_coefs_m1[i,6]*GX$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*GX$voted2012 +
                              sim_coefs_m1[i,8]*GX$obamavote2012 +
                              sim_coefs_m1[i,9]*GX$female*0+
                              sim_coefs_m1[i,10]*0*GX$Millennials2014+
                              sim_coefs_m1[i,11]*0*GX$GenerationX2014+
                              sim_coefs_m1[i,12]*0*GX$SilentGeneration2014))
  w1.GX[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*1 + 
                              sim_coefs_m1[i,3]*GX$female + 
                              sim_coefs_m1[i,4]*GX$Millennials2014  +
                              sim_coefs_m1[i,5]*GX$GenerationX2014 +
                              sim_coefs_m1[i,6]*GX$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*GX$voted2012 +
                              sim_coefs_m1[i,8]*GX$obamavote2012 +
                              sim_coefs_m1[i,9]*GX$female*1+
                              sim_coefs_m1[i,10]*1*GX$Millennials2014+
                              sim_coefs_m1[i,11]*1*GX$GenerationX2014+
                              sim_coefs_m1[i,12]*1*GX$SilentGeneration2014))
  w0.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*0 + 
                             sim_coefs_m1[i,3]*M$female + 
                             sim_coefs_m1[i,4]*M$Millennials2014  +
                             sim_coefs_m1[i,5]*M$GenerationX2014 +
                             sim_coefs_m1[i,6]*M$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*M$voted2012 +
                             sim_coefs_m1[i,8]*M$obamavote2012 +
                             sim_coefs_m1[i,9]*M$female*0+
                             sim_coefs_m1[i,10]*0*M$Millennials2014+
                             sim_coefs_m1[i,11]*0*M$GenerationX2014+
                             sim_coefs_m1[i,12]*0*M$SilentGeneration2014))
  w1.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*1 + 
                             sim_coefs_m1[i,3]*M$female + 
                             sim_coefs_m1[i,4]*M$Millennials2014  +
                             sim_coefs_m1[i,5]*M$GenerationX2014 +
                             sim_coefs_m1[i,6]*M$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*M$voted2012 +
                             sim_coefs_m1[i,8]*M$obamavote2012 +
                             sim_coefs_m1[i,9]*M$female*1+
                             sim_coefs_m1[i,10]*1*M$Millennials2014+
                             sim_coefs_m1[i,11]*1*M$GenerationX2014+
                             sim_coefs_m1[i,12]*1*M$SilentGeneration2014))
}

effect.SG	<- w1.SG - w0.SG 
quantile(effect.SG, c(.025,.975)) #to get CIs; -0.014007059 -0.008933538 
mean(effect.SG) #to get mean effect= -0.01140599

effect.BB	<-  w1.BB - w0.BB
quantile(effect.BB, c(.025,.975)) #to get CIs;  -0.006801758 -0.003459274 
mean(effect.BB) #to get mean effect= -0.005093274

effect.GX	<-  w1.GX - w0.GX
quantile(effect.GX, c(.025,.975)) #to get CIs; -0.01582452 -0.01254737 
mean(effect.GX) #to get mean effect -0.01421268

effect.M	<-  w1.M - w0.M
quantile(effect.M, c(.025,.975)) #to get CIs; -0.011790170 -0.009145106
mean(effect.M) #to get mean effect= -0.01046261

#Effect by gender subgroup.
w0.F	<- NULL
w1.F 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL

Men <- subset(sample, female==0)
Women <- subset(sample, female==1)

for(i in 1:n_draws){ 
  w0.F[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*0 + 
                             sim_coefs_m1[i,3]*Women$female + 
                             sim_coefs_m1[i,4]*Women$Millennials2014  +
                             sim_coefs_m1[i,5]*Women$GenerationX2014 +
                             sim_coefs_m1[i,6]*Women$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Women$voted2012 +
                             sim_coefs_m1[i,8]*Women$obamavote2012 +
                             sim_coefs_m1[i,9]*Women$female*0+
                             sim_coefs_m1[i,10]*0*Women$Millennials2014+
                             sim_coefs_m1[i,11]*0*Women$GenerationX2014+
                             sim_coefs_m1[i,12]*0*Women$SilentGeneration2014))
  w1.F[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*1 + 
                             sim_coefs_m1[i,3]*Women$female + 
                             sim_coefs_m1[i,4]*Women$Millennials2014  +
                             sim_coefs_m1[i,5]*Women$GenerationX2014 +
                             sim_coefs_m1[i,6]*Women$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Women$voted2012 +
                             sim_coefs_m1[i,8]*Women$obamavote2012 +
                             sim_coefs_m1[i,9]*Women$female*1+
                             sim_coefs_m1[i,10]*1*Women$Millennials2014+
                             sim_coefs_m1[i,11]*1*Women$GenerationX2014+
                             sim_coefs_m1[i,12]*1*Women$SilentGeneration2014))
  w0.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*0 + 
                             sim_coefs_m1[i,3]*Men$female + 
                             sim_coefs_m1[i,4]*Men$Millennials2014  +
                             sim_coefs_m1[i,5]*Men$GenerationX2014 +
                             sim_coefs_m1[i,6]*Men$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Men$voted2012 +
                             sim_coefs_m1[i,8]*Men$obamavote2012 +
                             sim_coefs_m1[i,9]*Men$female*0+
                             sim_coefs_m1[i,10]*0*Men$Millennials2014+
                             sim_coefs_m1[i,11]*0*Men$GenerationX2014+
                             sim_coefs_m1[i,12]*0*Men$SilentGeneration2014))
  w1.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*1 + 
                             sim_coefs_m1[i,3]*Men$female + 
                             sim_coefs_m1[i,4]*Men$Millennials2014  +
                             sim_coefs_m1[i,5]*Men$GenerationX2014 +
                             sim_coefs_m1[i,6]*Men$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Men$voted2012 +
                             sim_coefs_m1[i,8]*Men$obamavote2012 +
                             sim_coefs_m1[i,9]*Men$female*1+
                             sim_coefs_m1[i,10]*1*Men$Millennials2014+
                             sim_coefs_m1[i,11]*1*Men$GenerationX2014+
                             sim_coefs_m1[i,12]*1*Men$SilentGeneration2014))
}

effect.Men	<-  w1.M - w0.M
quantile(effect.Men, c(.025,.975)) #to get CIs;  -0.009830588 -0.007145311
mean(effect.Men) # mean effect=  -0.008448452

effect.Women	<-  w1.F - w0.F
quantile(effect.Women, c(.025,.975)) #to get CIs;  -0.011945322 -0.009264748 
mean(effect.Women) #mean effect= -0.01064033

#Store the Effects.
Election<-c(rep("2014 Primary",8))
Effect<-c(meaneffect, mean(effect.Women), mean(effect.Men), NA, mean(effect.M),mean(effect.GX),mean(effect.BB),mean(effect.SG))
LowerCI<-c(lowerci,  quantile(effect.Women, c(.025)), quantile(effect.Men, c(.025)), NA, quantile(effect.M, c(.025)),quantile(effect.GX, c(.025)),quantile(effect.BB, c(.025)),quantile(effect.SG, c(.025)))
UpperCI<-c(upperci, quantile(effect.Women, c(.975)), quantile(effect.Men, c(.975)), NA, quantile(effect.M, c(.975)),quantile(effect.GX, c(.975)),quantile(effect.BB, c(.975)),quantile(effect.SG, c(.975)))
WAprimary<-data.frame(Effect, UpperCI, LowerCI, Election) 
WAprimary$`Registrant Subgroup`<-factor(c(1,2,3,4,5,6,7,8), levels=c(1,2,3,4,5,6,7,8),
                                        labels=c("Overall", "Women", "Men", "Gen. Z", "Millennials", "Gen. X", "Baby\nBoomers", "Silent Gen."), ordered = T)
#write effects to data frame.
write.csv(WAprimary, "WA2014PrimaryEffects.csv", row.names=F)

